Median income

median_income = read_excel("./data/MedianZIP-3.xlsx") %>% 
  janitor::clean_names() %>% 
  rename(zipcode = zip)

Data resources: https://www.psc.isr.umich.edu/dis/census/Features/tract2zip/ It is the data of 2010

May_newcases = read_csv("./data/May_newcases")
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   zipcode = col_double(),
##   day = col_character(),
##   positive = col_double(),
##   total = col_double(),
##   zcta_cum_perc_pos = col_double(),
##   neighborhood_name = col_character(),
##   borough_group = col_character(),
##   covid_case_rate = col_double(),
##   pop_denominator = col_double(),
##   covid_death_count = col_double(),
##   covid_death_rate = col_double(),
##   percent_positive = col_double(),
##   newcases_day = col_double()
## )
New_York_City_zip = May_newcases %>% 
  distinct(zipcode)

median_income_nyc = left_join(New_York_City_zip, median_income)
## Joining, by = "zipcode"
#write.csv(median_income_nyc,file="data/predictor_data/median_income_nyc.csv")

Race from census bureau

race_by_zipcode = read_excel("./data/race_by_zipcode.xlsx",skip = 1) %>% 
  select(-id) %>% 
  janitor::clean_names() %>% 
  separate(geographic_area_name,into = c("ZCTA", "zipcode_state"), sep = 6) %>% 
  separate(zipcode_state,into = c("zipcode", "state"),sep = 5) %>% 
  select(-ZCTA, -state) %>% 
  mutate(zipcode = as.numeric(zipcode))

race_by_zipcode_nyc = left_join(New_York_City_zip, race_by_zipcode)
## Joining, by = "zipcode"
colnames(race_by_zipcode_nyc) <- str_replace(colnames(race_by_zipcode_nyc), "total_", "")
#write.csv(race_by_zipcode_nyc,file="data/predictor_data/race_by_zipcode_nyc.csv")

sex by age from census bureau

sex_by_age = read_excel("./data/sex_by_age.xlsx", skip = 1) %>% 
  janitor::clean_names()  %>% 
  separate(geographic_area_name,into = c("ZCTA", "zipcode_state"), sep = 6) %>% 
  separate(zipcode_state,into = c("zipcode", "state"),sep = 5) %>% 
  select(-ZCTA, -state, -id) %>% 
  mutate(zipcode = as.numeric(zipcode))

sex_by_age_zipcode_nyc = left_join(New_York_City_zip, sex_by_age)
## Joining, by = "zipcode"
colnames(sex_by_age_zipcode_nyc) <- str_replace(colnames(sex_by_age_zipcode_nyc), "total_", "")

male = sex_by_age_zipcode_nyc %>% 
  select(zipcode, starts_with("male")) %>% 
  mutate(sex = rep("male", nrow(.))) %>% 
  rename(total = male)
colnames(male) <- str_replace(colnames(male), "male_", "")
colnames(male) <- str_replace(colnames(male), "_years", "")

female = sex_by_age_zipcode_nyc %>% 
  select(zipcode, starts_with("female")) %>% 
  mutate(sex = rep("female", nrow(.)))%>% 
  rename(total = female)
colnames(female) <- str_replace(colnames(male), "female_", "")
colnames(female) <- str_replace(colnames(male), "_years", "")

sex_age_zipcode_nyc = rbind(male, female) %>% 
  select(zipcode, total, sex, everything())

#write.csv(sex_age_zipcode_nyc,file="data/predictor_data/sex_by_age_zipcode_nyc.csv")

Household

household = read_excel("./data/household.xlsx", skip = 1)%>% 
  janitor::clean_names()  %>% 
  separate(geographic_area_name,into = c("ZCTA", "zipcode_state"), sep = 6) %>% 
  separate(zipcode_state,into = c("zipcode", "state"),sep = 5) %>% 
  select(-ZCTA, -state, -id) %>% 
  mutate(zipcode = as.numeric(zipcode))

household_zipcode_nyc = left_join(New_York_City_zip,household)
## Joining, by = "zipcode"
colnames(household_zipcode_nyc) <- str_replace(colnames(household_zipcode_nyc), "total_", "")
colnames(household_zipcode_nyc) <- str_replace(colnames(household_zipcode_nyc), "_person_household", "")
# write.csv(household_zipcode_nyc,file="data/predictor_data/household_zipcode_nyc.csv")

Zipcode map data

latitude and longitude.

https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/export/?q=NY&refine.state=NY

zipcode_map = read_xlsx("./data/us-zip-code-latitude-and-longitude.xlsx",skip = 1) %>% 
  janitor::clean_names() %>% 
  rename(zipcode = zip)

zipcode_map_nyc = left_join(New_York_City_zip, zipcode_map)
## Joining, by = "zipcode"
zipcode_map_nyc = zipcode_map_nyc %>% select(zipcode,latitude,longitude)
r <- GET('http://data.beta.nyc//dataset/0ff93d2d-90ba-457c-9f7e-39e47bf2ac5f/resource/35dd04fb-81b3-479b-a074-a27a37888ce7/download/d085e2f8d0b54d4590b1e7d1f35594c1pediacitiesnycneighborhoods.geojson')
nyc_neighborhoods <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
summary(nyc_neighborhoods)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##         min       max
## x -74.25559 -73.70001
## y  40.49613  40.91553
## Is projected: FALSE 
## proj4string : [+proj=longlat +datum=WGS84 +no_defs]
## Data attributes:
##           neighborhood boroughCode          borough  
##  Jamaica Bay    : 16   1:38        Bronx        :75  
##  Pelham Islands : 14   2:74        Brooklyn     :71  
##  Broad Channel  : 10   3:67        Manhattan    :37  
##  Marine Park    :  3   4:77        Queens       :73  
##  Pelham Bay Park:  3   5:54        Staten Island:54  
##  Bayswater      :  2                                 
##  (Other)        :262                                 
##                                                                X.id    
##  http://nyc.pediacities.com/Resource/Neighborhood/Jamaica_Bay    : 16  
##  http://nyc.pediacities.com/Resource/Neighborhood/Pelham_Islands : 14  
##  http://nyc.pediacities.com/Resource/Neighborhood/Broad_Channel  : 10  
##  http://nyc.pediacities.com/Resource/Neighborhood/Marine_Park    :  3  
##  http://nyc.pediacities.com/Resource/Neighborhood/Pelham_Bay_Park:  3  
##  http://nyc.pediacities.com/Resource/Neighborhood/Bayswater      :  2  
##  (Other)                                                         :262
leaflet(nyc_neighborhoods) %>%
  addTiles() %>% 
  addPolygons(popup = ~neighborhood) %>%
  addProviderTiles("CartoDB.Positron")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228

Use Median income to draw the map

library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(raster)
## 
## Attaching package: 'raster'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:tidyr':
## 
##     extract
library(dplyr)
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(tmap)    # for static and interactive maps
library(leaflet) # for interactive maps
library(ggplot2) # tidyverse data visualization package
library(shiny)   # for web applications
median_income_nyc
## # A tibble: 177 x 4
##    zipcode  median    mean   pop
##      <dbl>   <dbl>   <dbl> <dbl>
##  1   10001  71245. 123113. 17678
##  2   10002  30844.  46259. 70878
##  3   10003  89999. 139331. 53609
##  4   10004 110184. 156683.  1271
##  5   10005 115133. 163763.  1517
##  6   10006 111220  156776    972
##  7   10007 145459. 256236.  3520
##  8   10009  56615.  78138. 56975
##  9   10010  93702. 137106. 27322
## 10   10011  92359. 160937. 45899
## # … with 167 more rows
m <- leaflet() %>% setView(lng = -73.99653, lat = 40.75074, zoom = 12)
m %>% addTiles()
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
r <- GET('http://data.beta.nyc//dataset/0ff93d2d-90ba-457c-9f7e-39e47bf2ac5f/resource/35dd04fb-81b3-479b-a074-a27a37888ce7/download/d085e2f8d0b54d4590b1e7d1f35594c1pediacitiesnycneighborhoods.geojson')
nyc_neighborhoods <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
leaflet(nyc_neighborhoods) %>%
  addTiles() %>% 
  addPolygons(stroke = FALSE, smoothFactor = 0.2, fillOpacity = 1,popup = ~neighborhood) %>%
  addProviderTiles("CartoDB.Positron")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
data0614 = read.csv("./data/June/0614/data-by-modzcta0614.csv") %>% 
  drop_na()%>% janitor::clean_names() %>% 
  rename(positive = covid_case_count,
         MODZCTA = modified_zcta) %>% 
  dplyr::select(MODZCTA,positive)

data0614_death = read.csv("./data/June/0614/data-by-modzcta0614.csv") %>% 
  drop_na()%>% janitor::clean_names() %>% 
  rename(positive = covid_case_count,
         MODZCTA = modified_zcta) %>% 
  dplyr::select(MODZCTA,covid_death_count)
spdf = rgdal::readOGR("Geography-resources/MODZCTA_2010_WGS1984.geo.json")
## OGR data source with driver: GeoJSON 
## Source: "/Users/ziqizhou/Desktop/MSPH_1st_Year/P8105_Data_Science/Git/NYC_covid-19.github.io/Geography-resources/MODZCTA_2010_WGS1984.geo.json", layer: "MODZCTA_2010_WGS1984.geo"
## with 178 features
## It has 2 fields
spdf@data
##     MODZCTA        label
## 0     10001 10001, 10118
## 1     10002        10002
## 2     10003        10003
## 3     10004        10004
## 4     10005        10005
## 5     10006        10006
## 6     10007        10007
## 7     10009        10009
## 8     10010        10010
## 9     10011        10011
## 10    10012        10012
## 11    10013        10013
## 12    10014        10014
## 13    10016        10016
## 14    10017        10017
## 15    10018        10018
## 16    10019 10019, 10020
## 17    10021        10021
## 18    10022        10022
## 19    10023        10023
## 20    10024        10024
## 21    10025        10025
## 22    10026        10026
## 23    10027        10027
## 24    10028        10028
## 25    10029        10029
## 26    10030        10030
## 27    10031        10031
## 28    10032        10032
## 29    10033        10033
## 30    10034        10034
## 31    10035        10035
## 32    10036        10036
## 33    10037        10037
## 34    10038        10038
## 35    10039        10039
## 36    10040        10040
## 37    10044        10044
## 38    10065        10065
## 39    10069        10069
## 40    10075 10075, 10162
## 41    10128        10128
## 42    10280        10280
## 43    10282        10282
## 44    10301        10301
## 45    10302        10302
## 46    10303        10303
## 47    10304        10304
## 48    10305        10305
## 49    10306        10306
## 50    10307        10307
## 51    10308        10308
## 52    10309        10309
## 53    10310        10310
## 54    10312        10312
## 55    10314        10314
## 56    10451        10451
## 57    10452        10452
## 58    10453        10453
## 59    10454        10454
## 60    10455        10455
## 61    10456        10456
## 62    10457        10457
## 63    10458        10458
## 64    10459        10459
## 65    10460        10460
## 66    10461        10461
## 67    10462        10462
## 68    10463        10463
## 69    10464        10464
## 70    10465        10465
## 71    10466        10466
## 72    10467        10467
## 73    10468        10468
## 74    10469        10469
## 75    10470        10470
## 76    10471        10471
## 77    10472        10472
## 78    10473        10473
## 79    10474        10474
## 80    10475        10475
## 81    11004 11004, 11005
## 82    11101        11101
## 83    11102        11102
## 84    11103        11103
## 85    11104        11104
## 86    11105        11105
## 87    11106        11106
## 88    11109        11109
## 89    11201        11201
## 90    11203        11203
## 91    11204        11204
## 92    11205        11205
## 93    11206        11206
## 94    11207        11207
## 95    11208        11208
## 96    11209        11209
## 97    11210        11210
## 98    11211 11211, 11249
## 99    11212        11212
## 100   11213        11213
## 101   11214        11214
## 102   11215        11215
## 103   11216        11216
## 104   11217 11217, 11243
## 105   11218        11218
## 106   11219        11219
## 107   11220        11220
## 108   11221        11221
## 109   11222        11222
## 110   11223        11223
## 111   11224        11224
## 112   11225        11225
## 113   11226        11226
## 114   11228        11228
## 115   11229        11229
## 116   11230        11230
## 117   11231        11231
## 118   11232        11232
## 119   11233        11233
## 120   11234        11234
## 121   11235        11235
## 122   11236        11236
## 123   11237        11237
## 124   11238        11238
## 125   11239        11239
## 126   11354        11354
## 127   11355        11355
## 128   11356        11356
## 129   11357        11357
## 130   11358        11358
## 131   11360        11360
## 132   11361        11361
## 133   11362        11362
## 134   11363        11363
## 135   11364        11364
## 136   11365        11365
## 137   11366        11366
## 138   11367        11367
## 139   11368        11368
## 140   11369        11369
## 141   11370        11370
## 142   11372        11372
## 143   11373        11373
## 144   11374        11374
## 145   11375        11375
## 146   11377        11377
## 147   11378        11378
## 148   11379        11379
## 149   11385        11385
## 150   11411        11411
## 151   11412        11412
## 152   11413        11413
## 153   11414        11414
## 154   11415        11415
## 155   11416        11416
## 156   11417        11417
## 157   11418        11418
## 158   11419        11419
## 159   11420        11420
## 160   11421        11421
## 161   11422        11422
## 162   11423        11423
## 163   11426        11426
## 164   11427        11427
## 165   11428        11428
## 166   11429        11429
## 167   11432        11432
## 168   11433        11433
## 169   11434        11434
## 170   11435        11435
## 171   11436        11436
## 172   11691        11691
## 173   11692        11692
## 174   11693        11693
## 175   11694        11694
## 176   11697        11697
## 177   99999
posi_map = geo_join(spdf,data0614,"MODZCTA","MODZCTA")

pal <- colorNumeric("Greens", domain=posi_map$positive)

# Getting rid of rows with NA values
posi_map <- subset(posi_map, !is.na(positive))

# Setting up the pop up text
popup_sb <- paste0("Positive Cases: ", as.character(posi_map$positive), 
                   "  Neighborhood: ", as.character(posi_map$neighborhood_name),
                   "  Boro: ",as.character(posi_map$borough_group),sep = ",")

# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(lng = -73.99653, lat = 40.75074, zoom = 12) %>% 
  addPolygons(data = posi_map , 
              fillColor = ~pal(posi_map$positive), 
              fillOpacity = 0.7, 
              weight = 0.2, 
              smoothFactor = 0.2, 
              popup = ~popup_sb) %>%
  addLegend(pal = pal, 
            values = posi_map$positive, 
            position = "bottomright", 
            title = "Cumulative cases")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
median_income_map = geo_join(spdf, median_income_nyc,"MODZCTA", "zipcode")
pal_med = colorNumeric(palette="YlOrRd", domain=median_income_map@data$median, na.color="transparent")

median_income_map = subset(median_income_map, !is.na(median))

popup_medi_sb <- paste0("Median Income: ", as.character(median_income_map$median))

# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(lng = -73.99653, lat = 40.75074, zoom = 12) %>% 
  addPolygons(data = median_income_map , 
              fillColor = ~pal_med(median_income_map$median), 
              fillOpacity = 0.7, 
              weight = 0.2, 
              smoothFactor = 0.2, 
              popup = ~popup_medi_sb) %>%
  addLegend(pal = pal_med, 
            values = median_income_map$median, 
            position = "bottomright", 
            title = "Median Annual Income")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
death_map = geo_join(spdf,data0614_death,"MODZCTA","MODZCTA")
  
pal_dea <- colorNumeric("Reds", domain=death_map$covid_death_count)

# Getting rid of rows with NA values
death_map <- subset(death_map, !is.na(covid_death_count))

# Setting up the pop up text
popup_dea_sb <- paste0("Death Count: ", as.character(death_map$covid_death_count))

# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(lng = -73.99653, lat = 40.75074, zoom = 12) %>% 
  addPolygons(data = death_map , 
              fillColor = ~pal_dea(death_map$covid_death_count), 
              fillOpacity = 0.7, 
              weight = 0.2, 
              smoothFactor = 0.2, 
              popup = ~popup_dea_sb) %>%
  addLegend(pal = pal_dea, 
            values = death_map$covid_death_count, 
            position = "bottomright", 
            title = "Death Count")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
library(broom)
spdf_fortified <- tidy(spdf)
## Regions defined for each Polygons
# Plot it
ggplot() +
  geom_polygon(data = spdf_fortified, aes( x = long, y = lat, group = group), fill="#69b3a2", color="white") +
  theme_void() +
  coord_map()